Carga y limpieza preliminar de los datos
Los datos que se van a analizar en este documento proceden de la compilación hecha por usuario de Kaggle.
import pandas as pd
datos = pd.read_csv("covid_19_clean_complete.csv")
datos.head(10)
## Province/State ... WHO Region
## 0 NaN ... Eastern Mediterranean
## 1 NaN ... Europe
## 2 NaN ... Africa
## 3 NaN ... Europe
## 4 NaN ... Africa
## 5 NaN ... Americas
## 6 NaN ... Americas
## 7 NaN ... Europe
## 8 Australian Capital Territory ... Western Pacific
## 9 New South Wales ... Western Pacific
##
## [10 rows x 10 columns]
pd <- import("pandas")
datos <- pd$read_csv("covid_19_clean_complete.csv")
kable(head(datos, 10))
|
Province/State
|
Country/Region
|
Lat
|
Long
|
Date
|
Confirmed
|
Deaths
|
Recovered
|
Active
|
WHO Region
|
|
NaN
|
Afghanistan
|
33.93911
|
67.70995
|
2020-01-22
|
0
|
0
|
0
|
0
|
Eastern Mediterranean
|
|
NaN
|
Albania
|
41.15330
|
20.16830
|
2020-01-22
|
0
|
0
|
0
|
0
|
Europe
|
|
NaN
|
Algeria
|
28.03390
|
1.65960
|
2020-01-22
|
0
|
0
|
0
|
0
|
Africa
|
|
NaN
|
Andorra
|
42.50630
|
1.52180
|
2020-01-22
|
0
|
0
|
0
|
0
|
Europe
|
|
NaN
|
Angola
|
-11.20270
|
17.87390
|
2020-01-22
|
0
|
0
|
0
|
0
|
Africa
|
|
NaN
|
Antigua and Barbuda
|
17.06080
|
-61.79640
|
2020-01-22
|
0
|
0
|
0
|
0
|
Americas
|
|
NaN
|
Argentina
|
-38.41610
|
-63.61670
|
2020-01-22
|
0
|
0
|
0
|
0
|
Americas
|
|
NaN
|
Armenia
|
40.06910
|
45.03820
|
2020-01-22
|
0
|
0
|
0
|
0
|
Europe
|
|
Australian Capital Territory
|
Australia
|
-35.47350
|
149.01240
|
2020-01-22
|
0
|
0
|
0
|
0
|
Western Pacific
|
|
New South Wales
|
Australia
|
-33.86880
|
151.20930
|
2020-01-22
|
0
|
0
|
0
|
0
|
Western Pacific
|
datos <- read.csv("covid_19_clean_complete.csv")
#kable(head(datos, 10))
#tidyverse
datos %>% head(10) %>% kable() %>% kable_styling()
|
Province.State
|
Country.Region
|
Lat
|
Long
|
Date
|
Confirmed
|
Deaths
|
Recovered
|
Active
|
WHO.Region
|
|
|
Afghanistan
|
33.93911
|
67.70995
|
2020-01-22
|
0
|
0
|
0
|
0
|
Eastern Mediterranean
|
|
|
Albania
|
41.15330
|
20.16830
|
2020-01-22
|
0
|
0
|
0
|
0
|
Europe
|
|
|
Algeria
|
28.03390
|
1.65960
|
2020-01-22
|
0
|
0
|
0
|
0
|
Africa
|
|
|
Andorra
|
42.50630
|
1.52180
|
2020-01-22
|
0
|
0
|
0
|
0
|
Europe
|
|
|
Angola
|
-11.20270
|
17.87390
|
2020-01-22
|
0
|
0
|
0
|
0
|
Africa
|
|
|
Antigua and Barbuda
|
17.06080
|
-61.79640
|
2020-01-22
|
0
|
0
|
0
|
0
|
Americas
|
|
|
Argentina
|
-38.41610
|
-63.61670
|
2020-01-22
|
0
|
0
|
0
|
0
|
Americas
|
|
|
Armenia
|
40.06910
|
45.03820
|
2020-01-22
|
0
|
0
|
0
|
0
|
Europe
|
|
Australian Capital Territory
|
Australia
|
-35.47350
|
149.01240
|
2020-01-22
|
0
|
0
|
0
|
0
|
Western Pacific
|
|
New South Wales
|
Australia
|
-33.86880
|
151.20930
|
2020-01-22
|
0
|
0
|
0
|
0
|
Western Pacific
|
Estructura de los datos
str(datos)
## 'data.frame': 49068 obs. of 10 variables:
## $ Province.State: chr "" "" "" "" ...
## $ Country.Region: chr "Afghanistan" "Albania" "Algeria" "Andorra" ...
## $ Lat : num 33.9 41.2 28 42.5 -11.2 ...
## $ Long : num 67.71 20.17 1.66 1.52 17.87 ...
## $ Date : chr "2020-01-22" "2020-01-22" "2020-01-22" "2020-01-22" ...
## $ Confirmed : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Deaths : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Recovered : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Active : int 0 0 0 0 0 0 0 0 0 0 ...
## $ WHO.Region : chr "Eastern Mediterranean" "Europe" "Africa" "Europe" ...
colnames(datos) = c("Provincia_Estado", "Pais_Region",
"Latitud", #N+ o S-
"Longitud", #E+ o W-
"Fecha", "Casos_Confirmados", "Casos_Muertos",
"Casos_Recuperados", "Casos_Activos", "Region_WHO")
datos %>% head() %>% kable() %>% kable_styling()
|
Provincia_Estado
|
Pais_Region
|
Latitud
|
Longitud
|
Fecha
|
Casos_Confirmados
|
Casos_Muertos
|
Casos_Recuperados
|
Casos_Activos
|
Region_WHO
|
|
|
Afghanistan
|
33.93911
|
67.70995
|
2020-01-22
|
0
|
0
|
0
|
0
|
Eastern Mediterranean
|
|
|
Albania
|
41.15330
|
20.16830
|
2020-01-22
|
0
|
0
|
0
|
0
|
Europe
|
|
|
Algeria
|
28.03390
|
1.65960
|
2020-01-22
|
0
|
0
|
0
|
0
|
Africa
|
|
|
Andorra
|
42.50630
|
1.52180
|
2020-01-22
|
0
|
0
|
0
|
0
|
Europe
|
|
|
Angola
|
-11.20270
|
17.87390
|
2020-01-22
|
0
|
0
|
0
|
0
|
Africa
|
|
|
Antigua and Barbuda
|
17.06080
|
-61.79640
|
2020-01-22
|
0
|
0
|
0
|
0
|
Americas
|
datos$Provincia_Estado <- as.vector(datos$Provincia_Estado)
datos$Provincia_Estado <- as.factor(datos$Provincia_Estado)
datos$Pais_Region <- as.factor(datos$Pais_Region)
#datos$Fecha %<>% ymd()
datos$Fecha <- as.Date(datos$Fecha)
#datos$Region_WHO <- as.factor(datos$Region_WHO)
datos$Region_WHO %<>% as.factor() #flujo hacia alante y atras (mas rapido)
str(datos)
## 'data.frame': 49068 obs. of 10 variables:
## $ Provincia_Estado : Factor w/ 79 levels "","Alberta","Anguilla",..: 1 1 1 1 1 1 1 1 6 47 ...
## $ Pais_Region : Factor w/ 187 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 9 ...
## $ Latitud : num 33.9 41.2 28 42.5 -11.2 ...
## $ Longitud : num 67.71 20.17 1.66 1.52 17.87 ...
## $ Fecha : Date, format: "2020-01-22" "2020-01-22" ...
## $ Casos_Confirmados: int 0 0 0 0 0 0 0 0 0 0 ...
## $ Casos_Muertos : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Casos_Recuperados: int 0 0 0 0 0 0 0 0 0 0 ...
## $ Casos_Activos : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Region_WHO : Factor w/ 6 levels "Africa","Americas",..: 3 4 1 4 1 2 2 4 6 6 ...
\[Confirmados = Muertos + Recuperados + Activos\]
#si no tuvieramos la columna de activos
#datos %<>% mutate(Casos_activos = Casos_Confirmados - Casos_Muertos - Casos_Recuperados)
datos %>%
filter(Casos_Confirmados>10000) %>%
head(10) %>%
kable()
|
Provincia_Estado
|
Pais_Region
|
Latitud
|
Longitud
|
Fecha
|
Casos_Confirmados
|
Casos_Muertos
|
Casos_Recuperados
|
Casos_Activos
|
Region_WHO
|
|
Hubei
|
China
|
30.9756
|
112.2707
|
2020-02-02
|
11177
|
350
|
295
|
10532
|
Western Pacific
|
|
Hubei
|
China
|
30.9756
|
112.2707
|
2020-02-03
|
13522
|
414
|
386
|
12722
|
Western Pacific
|
|
Hubei
|
China
|
30.9756
|
112.2707
|
2020-02-04
|
16678
|
479
|
522
|
15677
|
Western Pacific
|
|
Hubei
|
China
|
30.9756
|
112.2707
|
2020-02-05
|
19665
|
549
|
633
|
18483
|
Western Pacific
|
|
Hubei
|
China
|
30.9756
|
112.2707
|
2020-02-06
|
22112
|
618
|
817
|
20677
|
Western Pacific
|
|
Hubei
|
China
|
30.9756
|
112.2707
|
2020-02-07
|
24953
|
699
|
1115
|
23139
|
Western Pacific
|
|
Hubei
|
China
|
30.9756
|
112.2707
|
2020-02-08
|
27100
|
780
|
1439
|
24881
|
Western Pacific
|
|
Hubei
|
China
|
30.9756
|
112.2707
|
2020-02-09
|
29631
|
871
|
1795
|
26965
|
Western Pacific
|
|
Hubei
|
China
|
30.9756
|
112.2707
|
2020-02-10
|
31728
|
974
|
2222
|
28532
|
Western Pacific
|
|
Hubei
|
China
|
30.9756
|
112.2707
|
2020-02-11
|
33366
|
1068
|
2639
|
29659
|
Western Pacific
|
#por si hay datos que no cumplen la ecuación (casos negativos):
datos %>%
filter(Casos_Activos < 0) %>%
arrange(Provincia_Estado, Fecha) %>% #para ordenar
kable() %>% kable_styling()
|
Provincia_Estado
|
Pais_Region
|
Latitud
|
Longitud
|
Fecha
|
Casos_Confirmados
|
Casos_Muertos
|
Casos_Recuperados
|
Casos_Activos
|
Region_WHO
|
|
|
Liechtenstein
|
47.140000
|
9.55000
|
2020-06-23
|
82
|
2
|
81
|
-1
|
Europe
|
|
|
Uganda
|
1.373333
|
32.29028
|
2020-07-20
|
1069
|
0
|
1071
|
-2
|
Africa
|
|
Channel Islands
|
United Kingdom
|
49.372300
|
-2.36440
|
2020-05-23
|
558
|
45
|
515
|
-2
|
Europe
|
|
Channel Islands
|
United Kingdom
|
49.372300
|
-2.36440
|
2020-05-24
|
558
|
45
|
517
|
-4
|
Europe
|
|
Channel Islands
|
United Kingdom
|
49.372300
|
-2.36440
|
2020-05-25
|
559
|
45
|
517
|
-3
|
Europe
|
|
Channel Islands
|
United Kingdom
|
49.372300
|
-2.36440
|
2020-05-30
|
560
|
45
|
525
|
-10
|
Europe
|
|
Channel Islands
|
United Kingdom
|
49.372300
|
-2.36440
|
2020-05-31
|
560
|
45
|
528
|
-13
|
Europe
|
|
Channel Islands
|
United Kingdom
|
49.372300
|
-2.36440
|
2020-06-01
|
560
|
45
|
528
|
-13
|
Europe
|
|
Channel Islands
|
United Kingdom
|
49.372300
|
-2.36440
|
2020-06-02
|
560
|
46
|
528
|
-14
|
Europe
|
|
Hainan
|
China
|
19.195900
|
109.74530
|
2020-03-24
|
168
|
6
|
168
|
-6
|
Western Pacific
|
|
Hainan
|
China
|
19.195900
|
109.74530
|
2020-03-25
|
168
|
6
|
168
|
-6
|
Western Pacific
|
|
Hainan
|
China
|
19.195900
|
109.74530
|
2020-03-26
|
168
|
6
|
168
|
-6
|
Western Pacific
|
|
Hainan
|
China
|
19.195900
|
109.74530
|
2020-03-27
|
168
|
6
|
168
|
-6
|
Western Pacific
|
|
Hainan
|
China
|
19.195900
|
109.74530
|
2020-03-28
|
168
|
6
|
168
|
-6
|
Western Pacific
|
|
Hainan
|
China
|
19.195900
|
109.74530
|
2020-03-29
|
168
|
6
|
168
|
-6
|
Western Pacific
|
|
Hainan
|
China
|
19.195900
|
109.74530
|
2020-03-30
|
168
|
6
|
168
|
-6
|
Western Pacific
|
|
Hainan
|
China
|
19.195900
|
109.74530
|
2020-03-31
|
168
|
6
|
168
|
-6
|
Western Pacific
|
|
Hainan
|
China
|
19.195900
|
109.74530
|
2020-04-01
|
168
|
6
|
168
|
-6
|
Western Pacific
|
#veamos que pasa en Hainan:
datos %>% filter(Provincia_Estado =="Hainan") %>%
head(10) %>%
kable() %>% kable_styling()
|
Provincia_Estado
|
Pais_Region
|
Latitud
|
Longitud
|
Fecha
|
Casos_Confirmados
|
Casos_Muertos
|
Casos_Recuperados
|
Casos_Activos
|
Region_WHO
|
|
Hainan
|
China
|
19.1959
|
109.7453
|
2020-01-22
|
4
|
0
|
0
|
4
|
Western Pacific
|
|
Hainan
|
China
|
19.1959
|
109.7453
|
2020-01-23
|
5
|
0
|
0
|
5
|
Western Pacific
|
|
Hainan
|
China
|
19.1959
|
109.7453
|
2020-01-24
|
8
|
0
|
0
|
8
|
Western Pacific
|
|
Hainan
|
China
|
19.1959
|
109.7453
|
2020-01-25
|
19
|
0
|
0
|
19
|
Western Pacific
|
|
Hainan
|
China
|
19.1959
|
109.7453
|
2020-01-26
|
22
|
0
|
0
|
22
|
Western Pacific
|
|
Hainan
|
China
|
19.1959
|
109.7453
|
2020-01-27
|
33
|
1
|
0
|
32
|
Western Pacific
|
|
Hainan
|
China
|
19.1959
|
109.7453
|
2020-01-28
|
40
|
1
|
0
|
39
|
Western Pacific
|
|
Hainan
|
China
|
19.1959
|
109.7453
|
2020-01-29
|
43
|
1
|
0
|
42
|
Western Pacific
|
|
Hainan
|
China
|
19.1959
|
109.7453
|
2020-01-30
|
46
|
1
|
1
|
44
|
Western Pacific
|
|
Hainan
|
China
|
19.1959
|
109.7453
|
2020-01-31
|
52
|
1
|
1
|
50
|
Western Pacific
|
#corregimos los casos erroneos (porque sabemos en este caso de donde viene el error)
datos %>%
filter(Provincia_Estado == "Hainan", Casos_Activos < 0) %>%
mutate(Casos_Recuperados = Casos_Recuperados + Casos_Activos, Casos_Activos = 0 )
## Provincia_Estado Pais_Region Latitud Longitud Fecha Casos_Confirmados
## 1 Hainan China 19.1959 109.7453 2020-03-24 168
## 2 Hainan China 19.1959 109.7453 2020-03-25 168
## 3 Hainan China 19.1959 109.7453 2020-03-26 168
## 4 Hainan China 19.1959 109.7453 2020-03-27 168
## 5 Hainan China 19.1959 109.7453 2020-03-28 168
## 6 Hainan China 19.1959 109.7453 2020-03-29 168
## 7 Hainan China 19.1959 109.7453 2020-03-30 168
## 8 Hainan China 19.1959 109.7453 2020-03-31 168
## 9 Hainan China 19.1959 109.7453 2020-04-01 168
## Casos_Muertos Casos_Recuperados Casos_Activos Region_WHO
## 1 6 162 0 Western Pacific
## 2 6 162 0 Western Pacific
## 3 6 162 0 Western Pacific
## 4 6 162 0 Western Pacific
## 5 6 162 0 Western Pacific
## 6 6 162 0 Western Pacific
## 7 6 162 0 Western Pacific
## 8 6 162 0 Western Pacific
## 9 6 162 0 Western Pacific
Análisis geográfico
#filtramos geográficamente Europa con un rectangulo
#forma 1
#datos_europa = datos[datos$Latitud>38 & datos$Longitud>-25 & datos$Longitud<30,] #no acotamos el norte
#forma 2
datos_europa = datos %>%
filter(Latitud>38, between(Longitud, -25, 30))
table(datos_europa$Pais_Region) %>%
as.data.frame() %>% #filter no funciona con table
filter(Freq > 0) %>% #quitamos los que son 0 (fuera del rectangulo)
kable() %>% kable_styling()
|
Var1
|
Freq
|
|
Albania
|
188
|
|
Andorra
|
188
|
|
Austria
|
188
|
|
Belarus
|
188
|
|
Belgium
|
188
|
|
Bosnia and Herzegovina
|
188
|
|
Bulgaria
|
188
|
|
Croatia
|
188
|
|
Czechia
|
188
|
|
Denmark
|
376
|
|
Estonia
|
188
|
|
Finland
|
188
|
|
France
|
188
|
|
Germany
|
188
|
|
Greece
|
188
|
|
Holy See
|
188
|
|
Hungary
|
188
|
|
Iceland
|
188
|
|
Ireland
|
188
|
|
Italy
|
188
|
|
Kosovo
|
188
|
|
Latvia
|
188
|
|
Liechtenstein
|
188
|
|
Lithuania
|
188
|
|
Luxembourg
|
188
|
|
Moldova
|
188
|
|
Monaco
|
188
|
|
Montenegro
|
188
|
|
Netherlands
|
188
|
|
North Macedonia
|
188
|
|
Norway
|
188
|
|
Poland
|
188
|
|
Portugal
|
188
|
|
Romania
|
188
|
|
San Marino
|
188
|
|
Serbia
|
188
|
|
Slovakia
|
188
|
|
Slovenia
|
188
|
|
Spain
|
188
|
|
Sweden
|
188
|
|
Switzerland
|
188
|
|
United Kingdom
|
564
|
#estado de europa el dia del confinamiento de españa
datos_europa %>%
filter(Fecha == ymd("2020-03-15")) %>%
kable() %>% kable_styling()
|
Provincia_Estado
|
Pais_Region
|
Latitud
|
Longitud
|
Fecha
|
Casos_Confirmados
|
Casos_Muertos
|
Casos_Recuperados
|
Casos_Activos
|
Region_WHO
|
|
|
Albania
|
41.15330
|
20.168300
|
2020-03-15
|
42
|
1
|
0
|
41
|
Europe
|
|
|
Andorra
|
42.50630
|
1.521800
|
2020-03-15
|
1
|
0
|
1
|
0
|
Europe
|
|
|
Austria
|
47.51620
|
14.550100
|
2020-03-15
|
860
|
1
|
6
|
853
|
Europe
|
|
|
Belarus
|
53.70980
|
27.953400
|
2020-03-15
|
27
|
0
|
3
|
24
|
Europe
|
|
|
Belgium
|
50.83330
|
4.469936
|
2020-03-15
|
886
|
4
|
1
|
881
|
Europe
|
|
|
Bosnia and Herzegovina
|
43.91590
|
17.679100
|
2020-03-15
|
24
|
0
|
0
|
24
|
Europe
|
|
|
Bulgaria
|
42.73390
|
25.485800
|
2020-03-15
|
51
|
2
|
0
|
49
|
Europe
|
|
|
Croatia
|
45.10000
|
15.200000
|
2020-03-15
|
49
|
0
|
1
|
48
|
Europe
|
|
|
Czechia
|
49.81750
|
15.473000
|
2020-03-15
|
253
|
0
|
0
|
253
|
Europe
|
|
Faroe Islands
|
Denmark
|
61.89260
|
-6.911800
|
2020-03-15
|
11
|
0
|
0
|
11
|
Europe
|
|
|
Denmark
|
56.26390
|
9.501800
|
2020-03-15
|
864
|
2
|
1
|
861
|
Europe
|
|
|
Estonia
|
58.59530
|
25.013600
|
2020-03-15
|
171
|
0
|
1
|
170
|
Europe
|
|
|
Finland
|
61.92411
|
25.748151
|
2020-03-15
|
244
|
0
|
10
|
234
|
Europe
|
|
|
France
|
46.22760
|
2.213700
|
2020-03-15
|
4499
|
91
|
12
|
4396
|
Europe
|
|
|
Germany
|
51.16569
|
10.451526
|
2020-03-15
|
5795
|
11
|
46
|
5738
|
Europe
|
|
|
Greece
|
39.07420
|
21.824300
|
2020-03-15
|
331
|
4
|
8
|
319
|
Europe
|
|
|
Holy See
|
41.90290
|
12.453400
|
2020-03-15
|
1
|
0
|
0
|
1
|
Europe
|
|
|
Hungary
|
47.16250
|
19.503300
|
2020-03-15
|
32
|
1
|
1
|
30
|
Europe
|
|
|
Iceland
|
64.96310
|
-19.020800
|
2020-03-15
|
171
|
5
|
8
|
158
|
Europe
|
|
|
Ireland
|
53.14240
|
-7.692100
|
2020-03-15
|
129
|
2
|
0
|
127
|
Europe
|
|
|
Italy
|
41.87194
|
12.567380
|
2020-03-15
|
24747
|
1809
|
2335
|
20603
|
Europe
|
|
|
Latvia
|
56.87960
|
24.603200
|
2020-03-15
|
30
|
0
|
1
|
29
|
Europe
|
|
|
Liechtenstein
|
47.14000
|
9.550000
|
2020-03-15
|
4
|
0
|
1
|
3
|
Europe
|
|
|
Lithuania
|
55.16940
|
23.881300
|
2020-03-15
|
12
|
0
|
1
|
11
|
Europe
|
|
|
Luxembourg
|
49.81530
|
6.129600
|
2020-03-15
|
59
|
1
|
0
|
58
|
Europe
|
|
|
Moldova
|
47.41160
|
28.369900
|
2020-03-15
|
23
|
0
|
0
|
23
|
Europe
|
|
|
Monaco
|
43.73330
|
7.416700
|
2020-03-15
|
2
|
0
|
0
|
2
|
Europe
|
|
|
Montenegro
|
42.70868
|
19.374390
|
2020-03-15
|
0
|
0
|
0
|
0
|
Europe
|
|
|
Netherlands
|
52.13260
|
5.291300
|
2020-03-15
|
1135
|
20
|
0
|
1115
|
Europe
|
|
|
North Macedonia
|
41.60860
|
21.745300
|
2020-03-15
|
14
|
0
|
1
|
13
|
Europe
|
|
|
Norway
|
60.47200
|
8.468900
|
2020-03-15
|
1221
|
3
|
1
|
1217
|
Europe
|
|
|
Poland
|
51.91940
|
19.145100
|
2020-03-15
|
119
|
3
|
0
|
116
|
Europe
|
|
|
Portugal
|
39.39990
|
-8.224500
|
2020-03-15
|
245
|
0
|
2
|
243
|
Europe
|
|
|
Romania
|
45.94320
|
24.966800
|
2020-03-15
|
131
|
0
|
9
|
122
|
Europe
|
|
|
San Marino
|
43.94240
|
12.457800
|
2020-03-15
|
101
|
5
|
4
|
92
|
Europe
|
|
|
Serbia
|
44.01650
|
21.005900
|
2020-03-15
|
48
|
0
|
0
|
48
|
Europe
|
|
|
Slovakia
|
48.66900
|
19.699000
|
2020-03-15
|
54
|
0
|
0
|
54
|
Europe
|
|
|
Slovenia
|
46.15120
|
14.995500
|
2020-03-15
|
219
|
1
|
0
|
218
|
Europe
|
|
|
Spain
|
40.46367
|
-3.749220
|
2020-03-15
|
7798
|
289
|
517
|
6992
|
Europe
|
|
|
Sweden
|
60.12816
|
18.643501
|
2020-03-15
|
1022
|
3
|
0
|
1019
|
Europe
|
|
|
Switzerland
|
46.81820
|
8.227500
|
2020-03-15
|
2200
|
14
|
4
|
2182
|
Europe
|
|
Channel Islands
|
United Kingdom
|
49.37230
|
-2.364400
|
2020-03-15
|
3
|
0
|
0
|
3
|
Europe
|
|
Isle of Man
|
United Kingdom
|
54.23610
|
-4.548100
|
2020-03-15
|
0
|
0
|
0
|
0
|
Europe
|
|
|
United Kingdom
|
55.37810
|
-3.436000
|
2020-03-15
|
3072
|
43
|
18
|
3011
|
Europe
|
|
|
Kosovo
|
42.60264
|
20.902977
|
2020-03-15
|
0
|
0
|
0
|
0
|
Europe
|
\[d(x.y)= \sqrt{(x_{Lat}-y_{Lat})^2+(x_{Long}-y_{Long})^2}\]
#filtramos con circulo geográfico en fechas concretas, definimos funcion
distancia_grados = function(x,y){
sqrt((x[1]-y[1])^2+(x[2]-y[2])^2)
}
distancia_grados_potsdam = function(x){
potsdam=c(52.366956, 13.906734)
distancia_grados(x, potsdam)
}
#distancia de cada lat y long de los datos de europa a potsdam
dist_potsdam = apply(cbind(datos_europa$Latitud, datos_europa$Longitud), MARGIN = 1,
FUN = distancia_grados_potsdam) #MARGIN=1 -> por filas
#añadimos columna
datos_europa %<>%
mutate(dist_potsdam = dist_potsdam)
#filtramos por fecha y distancia menor de 4 grados (circulo centro potsdam y radio 4 grados)
datos_europa %>%
filter(between(Fecha, dmy("2-3-2020"), dmy("7-3-2020")),
dist_potsdam < 4) %>%
kable() %>% kable_styling()
|
Provincia_Estado
|
Pais_Region
|
Latitud
|
Longitud
|
Fecha
|
Casos_Confirmados
|
Casos_Muertos
|
Casos_Recuperados
|
Casos_Activos
|
Region_WHO
|
dist_potsdam
|
|
|
Czechia
|
49.81750
|
15.47300
|
2020-03-02
|
3
|
0
|
0
|
3
|
Europe
|
2.992142
|
|
|
Germany
|
51.16569
|
10.45153
|
2020-03-02
|
159
|
0
|
16
|
143
|
Europe
|
3.658073
|
|
|
Czechia
|
49.81750
|
15.47300
|
2020-03-03
|
5
|
0
|
0
|
5
|
Europe
|
2.992142
|
|
|
Germany
|
51.16569
|
10.45153
|
2020-03-03
|
196
|
0
|
16
|
180
|
Europe
|
3.658073
|
|
|
Czechia
|
49.81750
|
15.47300
|
2020-03-04
|
8
|
0
|
0
|
8
|
Europe
|
2.992142
|
|
|
Germany
|
51.16569
|
10.45153
|
2020-03-04
|
262
|
0
|
16
|
246
|
Europe
|
3.658073
|
|
|
Czechia
|
49.81750
|
15.47300
|
2020-03-05
|
12
|
0
|
0
|
12
|
Europe
|
2.992142
|
|
|
Germany
|
51.16569
|
10.45153
|
2020-03-05
|
482
|
0
|
16
|
466
|
Europe
|
3.658073
|
|
|
Czechia
|
49.81750
|
15.47300
|
2020-03-06
|
18
|
0
|
0
|
18
|
Europe
|
2.992142
|
|
|
Germany
|
51.16569
|
10.45153
|
2020-03-06
|
670
|
0
|
17
|
653
|
Europe
|
3.658073
|
|
|
Czechia
|
49.81750
|
15.47300
|
2020-03-07
|
19
|
0
|
0
|
19
|
Europe
|
2.992142
|
|
|
Germany
|
51.16569
|
10.45153
|
2020-03-07
|
799
|
0
|
18
|
781
|
Europe
|
3.658073
|
world <- ne_countries(scale = "medium", returnclass = "sf") #datos del mundo
ggplot(data = world) + geom_sf(color = "black", fill="green") +
xlab("Longitud") + ylab("Latitud") +
ggtitle("Mapa del mundo", subtitle = "COVID19")

ggplot(data = world) + geom_sf(color = "black", aes(fill = mapcolor13)) + #utilizamos una estética (aes)
xlab("Longitud") + ylab("Latitud") +
ggtitle("Mapa del mundo", subtitle = "COVID19")

#añadimos un level porque USA coincida en el nombre de ambos sets
datos$Pais_Region = factor(datos$Pais_Region, levels= c(levels(datos$Pais_Region), "United States"))
datos[datos$Pais_Region=="US",]$Pais_Region = "United States" #para que USA coincida en el nombre
#intentemos cruzar los datos (puede haber problemas de inconsistencia entre los nombres de los paises de cada set)
world %>%
inner_join(datos, by=c("name"="Pais_Region")) %>%
filter(Fecha == dmy("15-03-2020")) %>%
ggplot() + geom_sf(color = "black", aes(fill = Casos_Confirmados)) +
scale_fill_viridis_c(option="plasma",trans="sqrt") +
xlab("Longitud") + ylab("Latitud") +
ggtitle("Mapa del mundo", subtitle = "COVID19")

#otra fecha (aumento de casos)
world %>%
inner_join(datos, by=c("name"="Pais_Region")) %>%
filter(Fecha == dmy("30-03-2020")) %>%
ggplot() + geom_sf(color = "black", aes(fill = Casos_Confirmados)) +
scale_fill_viridis_c(option="plasma",trans="sqrt") + #para mejor visualizacion
xlab("Longitud") + ylab("Latitud") +
ggtitle("Mapa del mundo", subtitle = "COVID19")

world %>%
inner_join(datos, by=c("name"="Pais_Region")) %>%
filter(Fecha == dmy("30-03-2020")) %>%
ggplot() + geom_sf(color = "black", aes(fill = Casos_Confirmados)) +
coord_sf(crs="+proj=laea +lat_0=50 + lon_0=10 +units=m +ellps=GRS80") + #transformacion de la proyeccion (centrada en EU)
scale_fill_viridis_c(option="plasma",trans="sqrt") +
xlab("Longitud") + ylab("Latitud") +
ggtitle("Mapa del mundo", subtitle = "COVID19")

#para ver concentracion de casos con puntos, sin necesidad del mapa inicialmnete
datos %>%
filter(Fecha==dmy("30-03-2020")) %>%
ggplot(aes(Longitud, Latitud))+
geom_point(aes(size = Casos_Confirmados, colour = Casos_Muertos))

#para mejor escala de tamaños
datos %>%
filter(Fecha==dmy("30-03-2020")) %>%
ggplot(aes(Longitud, Latitud))+
geom_point(aes(size = log(Casos_Confirmados+1), colour = log(Casos_Muertos+1))) +
coord_fixed()+ #ajustar tamaño mapa
theme(legend.position = "bottom") #para que la leyenda no quite espacio

#necesitamos mapa plano para poder hacer zoom
world %>%
inner_join(datos, by=c("name"="Pais_Region")) %>%
filter(Fecha == dmy("30-03-2020")) %>%
ggplot() + geom_sf(color = "black", aes(fill = Casos_Confirmados)) +
# coord_sf(crs="+proj=laea +lat_0=50 + lon_0=10 +units=m +ellps=GRS80")
scale_fill_viridis_c(option="plasma",trans="sqrt") +
xlab("Longitud") + ylab("Latitud") +
ggtitle("Mapa del mundo", subtitle = "COVID19") -> g #lo guardamos en una variable
#hacemos uso de plotly
ggplotly(g)